1 package org.apache.lucene.index; 2 3 /* 4 * Licensed to the Apache Software Foundation (ASF) under one or more 5 * contributor license agreements. See the NOTICE file distributed with 6 * this work for additional information regarding copyright ownership. 7 * The ASF licenses this file to You under the Apache License, Version 2.0 8 * (the "License"); you may not use this file except in compliance with 9 * the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 */ 19 20 import org.apache.lucene.document.Document; 21 import org.apache.lucene.document.DocumentStoredFieldVisitor; 22 import org.apache.lucene.store.AlreadyClosedException; 23 import org.apache.lucene.util.Bits; // javadocs 24 import org.apache.lucene.util.IOUtils; 25 26 import java.io.Closeable; 27 import java.io.IOException; 28 import java.util.Collections; 29 import java.util.LinkedHashSet; 30 import java.util.List; 31 import java.util.Set; 32 import java.util.WeakHashMap; 33 import java.util.concurrent.atomic.AtomicInteger; 34 35 /** 36 IndexReader is an abstract class, providing an interface for accessing a 37 point-in-time view of an index. Any changes made to the index 38 via {@link IndexWriter} will not be visible until a new 39 {@code IndexReader} is opened. It's best to use {@link 40 DirectoryReader#open(IndexWriter,boolean)} to obtain an 41 {@code IndexReader}, if your {@link IndexWriter} is 42 in-process. When you need to re-open to see changes to the 43 index, it's best to use {@link DirectoryReader#openIfChanged(DirectoryReader)} 44 since the new reader will share resources with the previous 45 one when possible. Search of an index is done entirely 46 through this abstract interface, so that any subclass which 47 implements it is searchable. 48 49 <p>There are two different types of IndexReaders: 50 <ul> 51 <li>{@link LeafReader}: These indexes do not consist of several sub-readers, 52 they are atomic. They support retrieval of stored fields, doc values, terms, 53 and postings. 54 <li>{@link CompositeReader}: Instances (like {@link DirectoryReader}) 55 of this reader can only 56 be used to get stored fields from the underlying LeafReaders, 57 but it is not possible to directly retrieve postings. To do that, get 58 the sub-readers via {@link CompositeReader#getSequentialSubReaders}. 59 Alternatively, you can mimic an {@link LeafReader} (with a serious slowdown), 60 by wrapping composite readers with {@link SlowCompositeReaderWrapper}. 61 </ul> 62 63 <p>IndexReader instances for indexes on disk are usually constructed 64 with a call to one of the static <code>DirectoryReader.open()</code> methods, 65 e.g. {@link DirectoryReader#open(org.apache.lucene.store.Directory)}. {@link DirectoryReader} implements 66 the {@link CompositeReader} interface, it is not possible to directly get postings. 67 68 <p> For efficiency, in this API documents are often referred to via 69 <i>document numbers</i>, non-negative integers which each name a unique 70 document in the index. These document numbers are ephemeral -- they may change 71 as documents are added to and deleted from an index. Clients should thus not 72 rely on a given document having the same number between sessions. 73 74 <p> 75 <a name="thread-safety"></a><p><b>NOTE</b>: {@link 76 IndexReader} instances are completely thread 77 safe, meaning multiple threads can call any of its methods, 78 concurrently. If your application requires external 79 synchronization, you should <b>not</b> synchronize on the 80 <code>IndexReader</code> instance; use your own 81 (non-Lucene) objects instead. 82 */ 83 public abstract class IndexReader implements Closeable { 84 85 private boolean closed = false; 86 private boolean closedByChild = false; 87 private final AtomicInteger refCount = new AtomicInteger(1); 88 89 IndexReader() { 90 if (!(this instanceof CompositeReader || this instanceof LeafReader)) 91 throw new Error("IndexReader should never be directly extended, subclass LeafReader or CompositeReader instead."); 92 } 93 94 /** 95 * A custom listener that's invoked when the IndexReader 96 * is closed. 97 * 98 * @lucene.experimental 99 */ 100 public static interface ReaderClosedListener { 101 /** Invoked when the {@link IndexReader} is closed. */ 102 public void onClose(IndexReader reader) throws IOException; 103 } 104 105 private final Set<ReaderClosedListener> readerClosedListeners = 106 Collections.synchronizedSet(new LinkedHashSet<ReaderClosedListener>()); 107 108 private final Set<IndexReader> parentReaders = 109 Collections.synchronizedSet(Collections.newSetFromMap(new WeakHashMap<IndexReader,Boolean>())); 110 111 /** Expert: adds a {@link ReaderClosedListener}. The 112 * provided listener will be invoked when this reader is closed. 113 * At this point, it is safe for apps to evict this reader from 114 * any caches keyed on {@link #getCombinedCoreAndDeletesKey()}. 115 * 116 * @lucene.experimental */ 117 public final void addReaderClosedListener(ReaderClosedListener listener) { 118 ensureOpen(); 119 readerClosedListeners.add(listener); 120 } 121 122 /** Expert: remove a previously added {@link ReaderClosedListener}. 123 * 124 * @lucene.experimental */ 125 public final void removeReaderClosedListener(ReaderClosedListener listener) { 126 ensureOpen(); 127 readerClosedListeners.remove(listener); 128 } 129 130 /** Expert: This method is called by {@code IndexReader}s which wrap other readers 131 * (e.g. {@link CompositeReader} or {@link FilterLeafReader}) to register the parent 132 * at the child (this reader) on construction of the parent. When this reader is closed, 133 * it will mark all registered parents as closed, too. The references to parent readers 134 * are weak only, so they can be GCed once they are no longer in use. 135 * @lucene.experimental */ 136 public final void registerParentReader(IndexReader reader) { 137 ensureOpen(); 138 parentReaders.add(reader); 139 } 140 141 private void notifyReaderClosedListeners(Throwable th) { 142 synchronized(readerClosedListeners) { 143 for(ReaderClosedListener listener : readerClosedListeners) { 144 try { 145 listener.onClose(this); 146 } catch (Throwable t) { 147 if (th == null) { 148 th = t; 149 } else { 150 th.addSuppressed(t); 151 } 152 } 153 } 154 IOUtils.reThrowUnchecked(th); 155 } 156 } 157 158 private void reportCloseToParentReaders() { 159 synchronized(parentReaders) { 160 for(IndexReader parent : parentReaders) { 161 parent.closedByChild = true; 162 // cross memory barrier by a fake write: 163 parent.refCount.addAndGet(0); 164 // recurse: 165 parent.reportCloseToParentReaders(); 166 } 167 } 168 } 169 170 /** Expert: returns the current refCount for this reader */ 171 public final int getRefCount() { 172 // NOTE: don't ensureOpen, so that callers can see 173 // refCount is 0 (reader is closed) 174 return refCount.get(); 175 } 176 177 /** 178 * Expert: increments the refCount of this IndexReader 179 * instance. RefCounts are used to determine when a 180 * reader can be closed safely, i.e. as soon as there are 181 * no more references. Be sure to always call a 182 * corresponding {@link #decRef}, in a finally clause; 183 * otherwise the reader may never be closed. Note that 184 * {@link #close} simply calls decRef(), which means that 185 * the IndexReader will not really be closed until {@link 186 * #decRef} has been called for all outstanding 187 * references. 188 * 189 * @see #decRef 190 * @see #tryIncRef 191 */ 192 public final void incRef() { 193 if (!tryIncRef()) { 194 ensureOpen(); 195 } 196 } 197 198 /** 199 * Expert: increments the refCount of this IndexReader 200 * instance only if the IndexReader has not been closed yet 201 * and returns <code>true</code> iff the refCount was 202 * successfully incremented, otherwise <code>false</code>. 203 * If this method returns <code>false</code> the reader is either 204 * already closed or is currently being closed. Either way this 205 * reader instance shouldn't be used by an application unless 206 * <code>true</code> is returned. 207 * <p> 208 * RefCounts are used to determine when a 209 * reader can be closed safely, i.e. as soon as there are 210 * no more references. Be sure to always call a 211 * corresponding {@link #decRef}, in a finally clause; 212 * otherwise the reader may never be closed. Note that 213 * {@link #close} simply calls decRef(), which means that 214 * the IndexReader will not really be closed until {@link 215 * #decRef} has been called for all outstanding 216 * references. 217 * 218 * @see #decRef 219 * @see #incRef 220 */ 221 public final boolean tryIncRef() { 222 int count; 223 while ((count = refCount.get()) > 0) { 224 if (refCount.compareAndSet(count, count+1)) { 225 return true; 226 } 227 } 228 return false; 229 } 230 231 /** 232 * Expert: decreases the refCount of this IndexReader 233 * instance. If the refCount drops to 0, then this 234 * reader is closed. If an exception is hit, the refCount 235 * is unchanged. 236 * 237 * @throws IOException in case an IOException occurs in doClose() 238 * 239 * @see #incRef 240 */ 241 public final void decRef() throws IOException { 242 // only check refcount here (don't call ensureOpen()), so we can 243 // still close the reader if it was made invalid by a child: 244 if (refCount.get() <= 0) { 245 throw new AlreadyClosedException("this IndexReader is closed"); 246 } 247 248 final int rc = refCount.decrementAndGet(); 249 if (rc == 0) { 250 closed = true; 251 Throwable throwable = null; 252 try { 253 doClose(); 254 } catch (Throwable th) { 255 throwable = th; 256 } finally { 257 try { 258 reportCloseToParentReaders(); 259 } finally { 260 notifyReaderClosedListeners(throwable); 261 } 262 } 263 } else if (rc < 0) { 264 throw new IllegalStateException("too many decRef calls: refCount is " + rc + " after decrement"); 265 } 266 } 267 268 /** 269 * Throws AlreadyClosedException if this IndexReader or any 270 * of its child readers is closed, otherwise returns. 271 */ 272 protected final void ensureOpen() throws AlreadyClosedException { 273 if (refCount.get() <= 0) { 274 throw new AlreadyClosedException("this IndexReader is closed"); 275 } 276 // the happens before rule on reading the refCount, which must be after the fake write, 277 // ensures that we see the value: 278 if (closedByChild) { 279 throw new AlreadyClosedException("this IndexReader cannot be used anymore as one of its child readers was closed"); 280 } 281 } 282 283 /** {@inheritDoc} 284 * <p>For caching purposes, {@code IndexReader} subclasses are not allowed 285 * to implement equals/hashCode, so methods are declared final. 286 * To lookup instances from caches use {@link #getCoreCacheKey} and 287 * {@link #getCombinedCoreAndDeletesKey}. 288 */ 289 @Override 290 public final boolean equals(Object obj) { 291 return (this == obj); 292 } 293 294 /** {@inheritDoc} 295 * <p>For caching purposes, {@code IndexReader} subclasses are not allowed 296 * to implement equals/hashCode, so methods are declared final. 297 * To lookup instances from caches use {@link #getCoreCacheKey} and 298 * {@link #getCombinedCoreAndDeletesKey}. 299 */ 300 @Override 301 public final int hashCode() { 302 return System.identityHashCode(this); 303 } 304 305 /** Retrieve term vectors for this document, or null if 306 * term vectors were not indexed. The returned Fields 307 * instance acts like a single-document inverted index 308 * (the docID will be 0). */ 309 public abstract Fields getTermVectors(int docID) 310 throws IOException; 311 312 /** Retrieve term vector for this document and field, or 313 * null if term vectors were not indexed. The returned 314 * Fields instance acts like a single-document inverted 315 * index (the docID will be 0). */ 316 public final Terms getTermVector(int docID, String field) 317 throws IOException { 318 Fields vectors = getTermVectors(docID); 319 if (vectors == null) { 320 return null; 321 } 322 return vectors.terms(field); 323 } 324 325 /** Returns the number of documents in this index. */ 326 public abstract int numDocs(); 327 328 /** Returns one greater than the largest possible document number. 329 * This may be used to, e.g., determine how big to allocate an array which 330 * will have an element for every document number in an index. 331 */ 332 public abstract int maxDoc(); 333 334 /** Returns the number of deleted documents. */ 335 public final int numDeletedDocs() { 336 return maxDoc() - numDocs(); 337 } 338 339 /** Expert: visits the fields of a stored document, for 340 * custom processing/loading of each field. If you 341 * simply want to load all fields, use {@link 342 * #document(int)}. If you want to load a subset, use 343 * {@link DocumentStoredFieldVisitor}. */ 344 public abstract void document(int docID, StoredFieldVisitor visitor) throws IOException; 345 346 /** 347 * Returns the stored fields of the <code>n</code><sup>th</sup> 348 * <code>Document</code> in this index. This is just 349 * sugar for using {@link DocumentStoredFieldVisitor}. 350 * <p> 351 * <b>NOTE:</b> for performance reasons, this method does not check if the 352 * requested document is deleted, and therefore asking for a deleted document 353 * may yield unspecified results. Usually this is not required, however you 354 * can test if the doc is deleted by checking the {@link 355 * Bits} returned from {@link MultiFields#getLiveDocs}. 356 * 357 * <b>NOTE:</b> only the content of a field is returned, 358 * if that field was stored during indexing. Metadata 359 * like boost, omitNorm, IndexOptions, tokenized, etc., 360 * are not preserved. 361 * 362 * @throws CorruptIndexException if the index is corrupt 363 * @throws IOException if there is a low-level IO error 364 */ 365 // TODO: we need a separate StoredField, so that the 366 // Document returned here contains that class not 367 // IndexableField 368 public final Document document(int docID) throws IOException { 369 final DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(); 370 document(docID, visitor); 371 return visitor.getDocument(); 372 } 373 374 /** 375 * Like {@link #document(int)} but only loads the specified 376 * fields. Note that this is simply sugar for {@link 377 * DocumentStoredFieldVisitor#DocumentStoredFieldVisitor(Set)}. 378 */ 379 public final Document document(int docID, Set<String> fieldsToLoad) 380 throws IOException { 381 final DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor( 382 fieldsToLoad); 383 document(docID, visitor); 384 return visitor.getDocument(); 385 } 386 387 /** Returns true if any documents have been deleted. Implementers should 388 * consider overriding this method if {@link #maxDoc()} or {@link #numDocs()} 389 * are not constant-time operations. */ 390 public boolean hasDeletions() { 391 return numDeletedDocs() > 0; 392 } 393 394 /** 395 * Closes files associated with this index. 396 * Also saves any new deletions to disk. 397 * No other methods should be called after this has been called. 398 * @throws IOException if there is a low-level IO error 399 */ 400 @Override 401 public final synchronized void close() throws IOException { 402 if (!closed) { 403 decRef(); 404 closed = true; 405 } 406 } 407 408 /** Implements close. */ 409 protected abstract void doClose() throws IOException; 410 411 /** 412 * Expert: Returns the root {@link IndexReaderContext} for this 413 * {@link IndexReader}'s sub-reader tree. 414 * <p> 415 * Iff this reader is composed of sub 416 * readers, i.e. this reader being a composite reader, this method returns a 417 * {@link CompositeReaderContext} holding the reader's direct children as well as a 418 * view of the reader tree's atomic leaf contexts. All sub- 419 * {@link IndexReaderContext} instances referenced from this readers top-level 420 * context are private to this reader and are not shared with another context 421 * tree. For example, IndexSearcher uses this API to drive searching by one 422 * atomic leaf reader at a time. If this reader is not composed of child 423 * readers, this method returns an {@link LeafReaderContext}. 424 * <p> 425 * Note: Any of the sub-{@link CompositeReaderContext} instances referenced 426 * from this top-level context do not support {@link CompositeReaderContext#leaves()}. 427 * Only the top-level context maintains the convenience leaf-view 428 * for performance reasons. 429 */ 430 public abstract IndexReaderContext getContext(); 431 432 /** 433 * Returns the reader's leaves, or itself if this reader is atomic. 434 * This is a convenience method calling {@code this.getContext().leaves()}. 435 * @see IndexReaderContext#leaves() 436 */ 437 public final List<LeafReaderContext> leaves() { 438 return getContext().leaves(); 439 } 440 441 /** Expert: Returns a key for this IndexReader, so CachingWrapperFilter can find 442 * it again. 443 * This key must not have equals()/hashCode() methods, so "equals" means "identical". */ 444 public Object getCoreCacheKey() { 445 // Don't call ensureOpen since FC calls this (to evict) 446 // on close 447 return this; 448 } 449 450 /** Expert: Returns a key for this IndexReader that also includes deletions, 451 * so CachingWrapperFilter can find it again. 452 * This key must not have equals()/hashCode() methods, so "equals" means "identical". */ 453 public Object getCombinedCoreAndDeletesKey() { 454 // Don't call ensureOpen since FC calls this (to evict) 455 // on close 456 return this; 457 } 458 459 /** Returns the number of documents containing the 460 * <code>term</code>. This method returns 0 if the term or 461 * field does not exists. This method does not take into 462 * account deleted documents that have not yet been merged 463 * away. 464 * @see TermsEnum#docFreq() 465 */ 466 public abstract int docFreq(Term term) throws IOException; 467 468 /** 469 * Returns the total number of occurrences of {@code term} across all 470 * documents (the sum of the freq() for each doc that has this term). This 471 * will be -1 if the codec doesn't support this measure. Note that, like other 472 * term measures, this measure does not take deleted documents into account. 473 */ 474 public abstract long totalTermFreq(Term term) throws IOException; 475 476 /** 477 * Returns the sum of {@link TermsEnum#docFreq()} for all terms in this field, 478 * or -1 if this measure isn't stored by the codec. Note that, just like other 479 * term measures, this measure does not take deleted documents into account. 480 * 481 * @see Terms#getSumDocFreq() 482 */ 483 public abstract long getSumDocFreq(String field) throws IOException; 484 485 /** 486 * Returns the number of documents that have at least one term for this field, 487 * or -1 if this measure isn't stored by the codec. Note that, just like other 488 * term measures, this measure does not take deleted documents into account. 489 * 490 * @see Terms#getDocCount() 491 */ 492 public abstract int getDocCount(String field) throws IOException; 493 494 /** 495 * Returns the sum of {@link TermsEnum#totalTermFreq} for all terms in this 496 * field, or -1 if this measure isn't stored by the codec (or if this fields 497 * omits term freq and positions). Note that, just like other term measures, 498 * this measure does not take deleted documents into account. 499 * 500 * @see Terms#getSumTotalTermFreq() 501 */ 502 public abstract long getSumTotalTermFreq(String field) throws IOException; 503 504 }